# ================================================================================== #
# Complaints About Police Misconduct Have Adverse Effects for Black Civilians (PSRM)
# - Script: Appendix C (Additional Analyses)
# - Author: Patrick Kraft (patrickwilli.kraft@uc3m.es)
# ================================================================================== #


# Load raw data, packages, and custom functions ---------------------------

source(here::here("00-func.R"))
load(here("sqf_ccrb.Rdata"))


# Appendix C.1: Racial Placebo Test (Complaints by Whites against  --------

m3placebo <- NULL

## variable selection for VAR
exo <- c("media_d", "unemp_d","visit_overseas_d",
         "temp_mean","precip_total", "jan")

## by white/other police officer
series <- c("po_cw","pw_cw")
for(i in series){
  endo <- c("sqf_d",paste0("ccrb_",i,"_d"),"arrests_d")
  tmp <- na.omit(left_join(black, controls)[, c(endo, exo)])
  m3placebo[[i]] <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")
}

## prepare estimates for plot
m3placebo_coef <- m3placebo %>% 
  map_dfr(~filter(prePlot(.$varresult, NeweyWest = F, report = "ccrb"), Model=="sqf_d")) %>%
  mutate(p = round(2* pt(abs(Estimate/SE), df=m3placebo[[1]]$obs, lower.tail = F), 3),
         CCRB = factor(gsub(".*\\.l","Lag ", Variables), levels=paste0("Lag ",4:1)),
         Group = factor(gsub("_d\\..*","", Variables), 
                        levels = c("ccrb_po_cw","ccrb_pw_cw"), 
                        labels = c("White Complaints, Non-White Officer (in CCRB)", 
                                   "White Complaints, White Officer (in CCRB)")))

## coefficient plot
p3a <- ggplot(m3placebo_coef, aes(y=Estimate, ymin=Estimate-1.96*SE, ymax=Estimate+1.96*SE, x=CCRB)) + 
  plot_default + geom_linerange() + geom_point(size=1) + coord_flip() + 
  facet_wrap(~Group, ncol=1, scales="free_y") +
  geom_text(label = paste0("p = ", m3placebo_coef$p), nudge_x = .4, size = 2) +
  geom_hline(yintercept=0, linetype="dotted") + xlab("CCRB Complaints by Whites") +
  ggtitle("(A) Estimated Effect on SQF Incidents") + ylab("Coefficient")

## simulate series
m3placebo_series <- rbind(
  varSim(m3placebo[[1]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_po_cw_d = max(black[,"ccrb_po_cw_d"],na.rm = T)),
         level = apply(black[,c("sqf","ccrb_po_cw","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "White Complaints, Non-White Officer (in CCRB)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group),
  varSim(m3placebo[[2]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_pw_cw_d = max(black[,"ccrb_pw_cw_d"],na.rm = T)),
         level = apply(black[,c("sqf","ccrb_pw_cw","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "White Complaints, White Officer (in CCRB)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group)) %>%
  mutate(Group = factor(Group, levels = c("White Complaints, Non-White Officer (in CCRB)",
                                          "White Complaints, White Officer (in CCRB)")))

## compute confidence intervals for simulations
m3placebo_ci <- m3placebo_series %>% group_by(time, Group) %>%
  summarize(avg=mean(SQF), cilo=quantile(SQF,.025), cihi=quantile(SQF,.975), SQF=min(SQF))
m3placebo_text <- m3placebo_ci %>% 
  group_by(Group) %>% summarize(SQF=min(SQF)) %>%
  mutate(shock = c(max(black[,"ccrb_po_cw_d"],na.rm = T), max(black[,"ccrb_pw_cw_d"],na.rm = T)),
         shock = paste0(" + ",round(shock, 1) * 100," complaints"),
         time = 0)

## plot simulations
p3b <- ggplot(m3placebo_series, aes(x=as.factor(time), y=SQF)) + plot_default +
  geom_line(aes(group=iter), alpha=.01) + ylab("Number of Incidents (x 1000)") + 
  geom_vline(xintercept = 2) + facet_wrap(~Group,ncol=1) + xlab("Month") + 
  ggtitle("(B) Predicted Monthly SQF Incidents") + 
  geom_linerange(aes(ymin=cilo, ymax=cihi, x=as.factor(time)), data=m3placebo_ci[m3placebo_ci$time>0,]) +
  geom_point(aes(y=avg, x=as.factor(time)), size=1, data=m3placebo_ci[m3placebo_ci$time>0,]) +
  geom_segment(aes(x = 2, y = avg, xend = 6, yend = avg), lty="dotted", data=m3placebo_ci[m3placebo_ci$time==0,]) +
  geom_text(aes(label=shock), vjust=0, hjust=0, size=2, data=m3placebo_text)

## save combined plot
ggsave("out/figC1_racialplacebo.png", 
       grid.arrange(p3a, p3b, ncol=2), 
       height=3.5, width=6.5)


# Appendix C.2a: Contraband/Weapon Found (in SQF) -------------------------

m5found <- NULL

## by prooductivity of stop
series <- c("found0","found1")
for(i in series){
  endo <- c(paste0("sqf_",i,"_d"),"ccrb_d","arrests_d")
  tmp <- na.omit(left_join(black, controls)[,c(endo, exo)])
  m5found[[i]] <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")
}

## prepare estimates for plot
m5found_coef <- map2_dfr(.x = m5found, .y = paste0("sqf_", series, "_d"), 
                         ~filter(prePlot(.x$varresult, NeweyWest = F, report = "ccrb"), Model==.y)) %>%
  mutate(p = round(2* pt(abs(Estimate/SE), df=m5found[[1]]$obs, lower.tail = F), 3),
         CCRB = factor(gsub(".*\\.l","Lag ", Variables), levels=paste0("Lag ", 5:1)),
         Group = factor(Model, c("sqf_found0_d","sqf_found1_d"),
                        labels = c("SQF (Black Suspects) - Contraband/Weapon: No",
                                   "SQF (Black Suspects) - Contraband/Weapon: Yes")))

p5a <- ggplot(m5found_coef, aes(y=Estimate, ymin=Estimate-1.96*SE, ymax=Estimate+1.96*SE, x=CCRB)) + 
  plot_default + geom_linerange() + geom_point(size=1) + coord_flip() + 
  facet_wrap(~Group, ncol=1, scales = "free_y") +
  geom_text(label = paste0("p = ", m5found_coef$p), nudge_x = .4, size = 2) +
  geom_hline(yintercept=0, linetype="dotted") + xlab("CCRB Complaints (Black Complainants)") +
  ggtitle("(A) Estimated Effect on SQF Incidents") + ylab("Coefficient")

m5found_series <- rbind(
  varSim(m5found[[1]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)), 
         shock = list(ccrb_d = max(black[,"ccrb_d"],na.rm = T)), 
         level = apply(black[,c("sqf_found0","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "SQF (Black Suspects) - Contraband/Weapon: No", SQF = sqf_found0_d.1) %>%
    dplyr::select(SQF,time,iter,Group), 
  varSim(m5found[[2]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)), 
         shock = list(ccrb_d = max(black[,"ccrb_d"],na.rm = T)), 
         level = apply(black[,c("sqf_found1","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "SQF (Black Suspects) - Contraband/Weapon: Yes", SQF = sqf_found1_d.1) %>%
    dplyr::select(SQF,time,iter,Group)) %>%
  mutate(Group = factor(Group, levels = c("SQF (Black Suspects) - Contraband/Weapon: No",
                                          "SQF (Black Suspects) - Contraband/Weapon: Yes")))

m5found_ci <- m5found_series %>% group_by(time, Group) %>%
  summarize(avg=mean(SQF), cilo=quantile(SQF,.025), cihi=quantile(SQF,.975), SQF=min(SQF))
m5found_text <- m5found_ci %>% 
  group_by(Group) %>% summarize(SQF=min(SQF)) %>%
  mutate(shock = c(max(black[,"ccrb_d"],na.rm = T), max(black[,"ccrb_d"],na.rm = T)), 
         time = 0) %>% mutate(shock = paste0(" + ",round(shock, 1) * 100," complaints"))

p5b <- ggplot(m5found_series, aes(x=as.factor(time), y=SQF)) + plot_default +
  geom_line(aes(group=iter), alpha=.01) + ylab("Number of Incidents (x 1000)") + geom_vline(xintercept = 2) +
  facet_wrap(~Group,ncol=1,scales="free_y") + xlab("Month") +
  ggtitle("(B) Simulated Monthly SQF Incidents") + #ylim(13,23) +
  geom_linerange(aes(ymin=cilo, ymax=cihi, x=as.factor(time)), data=m5found_ci[m5found_ci$time>0,]) +
  geom_point(aes(y=avg, x=as.factor(time)), size=1, data=m5found_ci[m5found_ci$time>0,]) +
  geom_segment(aes(x = 2, y = avg, xend = 6, yend = avg), lty="dotted", data=m5found_ci[m5found_ci$time==0,]) +
  geom_text(aes(label=shock), vjust=0, hjust=0, size=2, data=m5found_text)

ggsave("out/figC2a_found.png", 
       grid.arrange(p5a,p5b,ncol=2), 
       height=3.5, width=6.5)


# Appendix C.2b: Arrest Made vs. not (in SQF) -----------------------------

m5arrest <- NULL

## estimate model
series <- c("arrest0","arrest1")
for(i in series){
  endo <- c(paste0("sqf_",i,"_d"),"ccrb_d","arrests_d")
  tmp <- na.omit(left_join(black, controls)[,c(endo, exo)])
  m5arrest[[i]] <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")
}

## prepare estimates for plot
m5arrest_coef <- map2_dfr(.x = m5arrest, .y = paste0("sqf_", series, "_d"), 
                         ~filter(prePlot(.x$varresult, NeweyWest = F, report = "ccrb"), Model==.y)) %>%
  mutate(p = round(2* pt(abs(Estimate/SE), df=m5arrest[[1]]$obs, lower.tail = F), 3),
         CCRB = factor(gsub(".*\\.l","Lag ", Variables), levels=paste0("Lag ", 5:1)),
         Group = factor(Model, levels = c("sqf_arrest0_d","sqf_arrest1_d"), 
                        labels = c("Blacks w/o Arrest (in SQF)", "Blacks w/ Arrest (in SQF)")))

p5a <- ggplot(m5arrest_coef, aes(y=Estimate, ymin=Estimate-1.96*SE, ymax=Estimate+1.96*SE, x=CCRB)) + 
  plot_default + geom_linerange() + geom_point(size=1) + coord_flip() + 
  facet_wrap(~Group, ncol=1, scales = "free_y") +
  geom_text(label = paste0("p = ", m5arrest_coef$p), nudge_x = .4, size = 2) +
  geom_hline(yintercept=0, linetype="dotted") + xlab("CCRB Complaints (Black Complainants)") +
  ggtitle("(A) Estimated Effect on SQF Incidents") + ylab("Coefficient")

m5arrest_series <- rbind(
  varSim(m5arrest[[1]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)), 
         shock = list(ccrb_d = max(black[,"ccrb_d"],na.rm = T)), 
         level = apply(black[,c("sqf_arrest0","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Blacks w/o Arrest (in SQF)", SQF = sqf_arrest0_d.1) %>%
    dplyr::select(SQF,time,iter,Group), 
  varSim(m5arrest[[2]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)), 
         shock = list(ccrb_d = max(black[,"ccrb_d"],na.rm = T)), 
         level = apply(black[,c("sqf_arrest1","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Blacks w/ Arrest (in SQF)", SQF = sqf_arrest1_d.1) %>%
    dplyr::select(SQF,time,iter,Group)) %>%
  mutate(Group = factor(Group, levels = c("Blacks w/o Arrest (in SQF)",
                                          "Blacks w/ Arrest (in SQF)")))

m5arrest_ci <- m5arrest_series %>% group_by(time, Group) %>%
  summarize(avg=mean(SQF), cilo=quantile(SQF,.025), cihi=quantile(SQF,.975), SQF=min(SQF))
m5arrest_text <- m5arrest_ci %>% 
  group_by(Group) %>% summarize(SQF=min(SQF)) %>%
  mutate(shock = c(max(black[,"ccrb_d"],na.rm = T), max(black[,"ccrb_d"],na.rm = T)), 
         time = 0) %>% mutate(shock = paste0(" + ",round(shock, 1) * 100," complaints"))

p5b <- ggplot(m5arrest_series, aes(x=as.factor(time), y=SQF)) + plot_default +
  geom_line(aes(group=iter), alpha=.01) + ylab("Number of Incidents (x 1000)") + geom_vline(xintercept = 2) +
  facet_wrap(~Group,ncol=1,scales="free_y") + xlab("Month") +
  ggtitle("(B) Simulated Monthly SQF Incidents") + #ylim(13,23) +
  geom_linerange(aes(ymin=cilo, ymax=cihi, x=as.factor(time)), data=m5arrest_ci[m5arrest_ci$time>0,]) +
  geom_point(aes(y=avg, x=as.factor(time)), size=1, data=m5arrest_ci[m5arrest_ci$time>0,]) +
  geom_segment(aes(x = 2, y = avg, xend = 6, yend = avg), lty="dotted", data=m5arrest_ci[m5arrest_ci$time==0,]) +
  geom_text(aes(label=shock), vjust=0, hjust=0, size=2, data=m5arrest_text)

ggsave("out/figC2b_arrest.png", 
       grid.arrange(p5a,p5b,ncol=2), 
       height=3.5, width=6.5)


# Appendix C.3: Spatial Placebo Test (in Brooklyn) ------------------------

m6brooklyn <- NULL

## By borough
boro <- c("Queens","Manhattan")
for(i in boro){
  endo <- c("sqf_Brooklyn_d", paste0("ccrb_",i,"_d"),"arrests_d")
  tmp <- na.omit(left_join(black, controls)[,c(endo, exo)])
  m6brooklyn[[i]] <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")
}

## prepare estimates for plot
m6brooklyn_coef <- m6brooklyn %>%
  map_dfr(~filter(prePlot(.$varresult, NeweyWest = F, report = "ccrb"), Model=="sqf_Brooklyn_d")) %>%
  mutate(p = round(2* pt(abs(Estimate/SE), df=m6brooklyn[[1]]$obs, lower.tail = F), 3),
         CCRB = factor(gsub(".*\\.l","Lag ", Variables), levels=paste0("Lag ",5:1)),
         Group = factor(gsub("_d\\..*","", Variables), 
                        levels = paste0("ccrb_",boro), 
                        labels = c("Black Complaints in Queens (CCRB)", 
                                   "Black Complaints in Manhattan (CCRB)")))

p4a <- ggplot(m6brooklyn_coef, aes(y=Estimate, ymin=Estimate-1.96*SE, ymax=Estimate+1.96*SE, x=CCRB)) + 
  plot_default + geom_linerange() + geom_point(size=1) + coord_flip() + facet_wrap(~Group, ncol=1) +
  geom_text(label = paste0("p = ", m6brooklyn_coef$p), nudge_x = .4, size = 2) +
  geom_hline(yintercept=0, linetype="dotted") + xlab("CCRB Complaints") +
  ggtitle("(A) Estimated Effect on SQF Incidents") + ylab("Coefficient")

m6brooklyn_series <- rbind(
  varSim(m6brooklyn[[1]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm = T), 6)), 
         shock = list(ccrb_Queens_d = max(black[,"ccrb_Queens_d"],na.rm = T)), 
         level = apply(black[,c("sqf_Brooklyn","ccrb_Queens","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Black Complaints in Queens (CCRB)", SQF = sqf_Brooklyn_d.1) %>%
    dplyr::select(SQF,time,iter,Group), 
  varSim(m6brooklyn[[2]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm = T), 6)), 
         shock = list(ccrb_Manhattan_d = max(black[,"ccrb_Manhattan_d"],na.rm = T)), 
         level = apply(black[,c("sqf_Brooklyn","ccrb_Manhattan","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Black Complaints in Manhattan (CCRB)", SQF = sqf_Brooklyn_d.1) %>%
    dplyr::select(SQF,time,iter,Group)) %>%
  mutate(Group = factor(Group, levels = c("Black Complaints in Queens (CCRB)", 
                                          "Black Complaints in Manhattan (CCRB)")))

m6brooklyn_ci <- m6brooklyn_series %>% group_by(time, Group) %>%
  summarize(avg=mean(SQF), cilo=quantile(SQF,.025), cihi=quantile(SQF,.975), SQF=min(SQF))
m6brooklyn_text <- m6brooklyn_ci %>% 
  group_by(Group) %>% summarize(SQF=min(SQF)) %>%
  mutate(shock = c(max(black[,"ccrb_Queens_d"],na.rm = T), 
                   max(black[,"ccrb_Manhattan_d"],na.rm = T)), 
         time = 0) %>% mutate(shock = paste0(" + ",round(shock, 1) * 100," complaints"))

p4b <- ggplot(m6brooklyn_series, aes(x=as.factor(time), y=SQF)) + plot_default +
  geom_line(aes(group=iter), alpha=.01) + ylab("Number of Incidents (x 1000)") + geom_vline(xintercept = 2) +
  facet_wrap(~Group,ncol=1) + xlab("Month") +
  ggtitle("(B) Predicted SQF Incidents (Blacks/Brooklyn)") +
  geom_linerange(aes(ymin=cilo, ymax=cihi, x=as.factor(time)), data=m6brooklyn_ci[m6brooklyn_ci$time>0,]) +
  geom_point(aes(y=avg, x=as.factor(time)), size=1, data=m6brooklyn_ci[m6brooklyn_ci$time>0,]) +
  geom_segment(aes(x = 2, y = avg, xend = 6, yend = avg), lty="dotted", data=m6brooklyn_ci[m6brooklyn_ci$time==0,]) +
  geom_text(aes(label=shock), vjust=0, hjust=0, size=2, data=m6brooklyn_text)
p4b

ggsave("out/figC3_spatialplacebo.png", 
       grid.arrange(p4a,p4b,ncol=2), 
       height=3.5, width=6.5)


# Appendix C.4: Controlling for Precinct Median Household Income ----------

m7income <- NULL

## by officer discretion
series <- c("pctpoor_Bronx", "pctpoor_Brooklyn", "pctpoor",
            "pctrich_Bronx", "pctrich_Brooklyn", "pctrich")
for(i in series){
  endo <- c(paste0("sqf_",i,"_d"),paste0("ccrb_",i,"_d"),"arrests_d")
  tmp <- na.omit(left_join(black, controls)[,c(endo, exo)])
  m7income[[i]] <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")
}

## prepare estimates for plot
m7income_coef <- map2_dfr(.x = m7income, .y = paste0("sqf_",series,"_d"), 
                    ~filter(prePlot(.x$varresult, NeweyWest = F, report = "ccrb"), Model==.y)) %>%
  mutate(p = round(2* pt(abs(Estimate/SE), df=m7income[[1]]$obs, lower.tail = F), 2),
         CCRB = factor(gsub(".*\\.l","Lag ", Variables), levels=paste0("Lag ",5:1)),
         Group = factor(Model, levels = paste0("sqf_",series,"_d"), 
                        labels = c("Median Household Income\nBelow Median<>Bronx",
                                   "Median Household Income\nBelow Median<>Brooklyn",
                                   "Median Household Income\nBelow Median<>New York City",
                                   "Median Household Income\nAbove Median<>Bronx",
                                   "Median Household Income\nAbove Median<>Brooklyn",
                                   "Median Household Income\nAbove Median<>New York City"))) %>% 
  separate(Group, into = c("percent","boro"), sep = "<>")

## coefficient plot
ggplot(m7income_coef, aes(y=Estimate, ymin=Estimate-1.96*SE, ymax=Estimate+1.96*SE, x=CCRB)) + 
  plot_default + geom_linerange() + geom_point(size=1) + coord_flip() + facet_grid(percent~boro, scales = "free_y") +
  geom_text(label = paste0("p = ", m7income_coef$p), nudge_x = .4, size = 2) +
  geom_hline(yintercept=0, linetype="dotted") + 
  labs(y = "Lag", x = "Coefficient")

## save plot
ggsave("out/figC4_pctincome.png", 
       height=3, width=7)

